Metlin’s got a ton of useful data, but it’s trapped behind a weird interactive Javascript thing in their website. The functions here allow the retrieval of arbitrary amounts of data from the website in a scripted fashion.
Two notes - one, I’m pretty sure this would not make Metlin happy. I’m not sure it’s technically illegal but I’d rather not have to figure that out. Two, Metlin has some rate-limitations in place that, if exceeded, will block an IP for about a day. I triggered this while trying to pull down the data for 20 amino acids, 5 times, within a minute. So I’m not sure how generous or aggressive they actually are.
library(getMetlin)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Start by searching for Metlin records by name or mass
head(getMetlinMz(117.078979))
## [1] "Metlin returned 145 compound(s) between 117.078686302552 and 117.079271697448 m/z with 1 unique formula(s): C5H11NO2"
## [1] "Of those, 5 have experimental MS/MS data: L-Valine, Betaine, N-Methyl-a-aminoisobutyric acid, 5-Aminopentanoic acid, L-Norvaline"
## cmpd_id exact_mass cmpd_name formula CAS
## 1 35 117.078978601 L-Valine C5H11NO2 72-18-4
## 2 287 117.078978601 Betaine C5H11NO2 590-46-5
## 3 6508 117.078979 N-Methyl-a-aminoisobutyric acid C5H11NO2
## 4 6762 117.078978601 isoamyl nitrite C5H11NO2
## 5 6902 117.078978601 5-Aminopentanoic acid C5H11NO2
## 6 35940 117.078965 4R-aminopentanoic acid C5H11NO2
## KEGG MSMS Structure
## 1 C00183 experimental
## 2 C00719 experimental
## 3 experimental
## 4 C07457 NO
## 5 C00431 experimental
## 6 NO
head(getMetlinName("Betaine"))
## [1] "Metlin returned 35 compound(s) with name Betaine with 33 unique formula(s): C11H21NO9, C12H17NO3, C12H17NO5, C16H12ClNO3, C16H15NO3S, C17H15NO3, C18H15NO5, C18H34N2O3, C26H53NO2, C42H81NO7, C42H81NO8, C44H81NO7, C46H79NO8, C48H83NO7, C4H9O2Se, C5H10N2S2, C5H11AsO2, C5H11NO, C5H11NO2, C5H11O2Se, C5H12ClNO, C5H12ClNO2, C5H12NO, C5H13NO3, C6H13AsO3, C6H13NO2, C6H14NO2, C7H13NO2, C7H14Cl3NO4, C7H15NO2, C7H15NO3, C9H20NO2, C9H22INO2"
## [1] "Of those, 5 have experimental MS/MS data: L-Carnitine, Betaine aldehyde, Betaine, Arsenobetaine, gamma-Butyrobetaine Ethyl Ester Chloride"
## cmpd_id exact_mass cmpd_name formula
## 1 52 161.105193351 L-Carnitine C7H15NO3
## 2 278 101.084063979 Betaine aldehyde C5H11NO
## 3 287 117.078978601 Betaine C5H11NO2
## 4 966 145.110278729 4-Trimethylammoniobutanoic acid C7H15NO2
## 5 7089 143.094628665 Proline betaine C7H13NO2
## 6 46617 711.601303951 DGTS(16:0/16:0) C42H81NO7
## CAS KEGG MSMS Structure
## 1 541-15-1 C00318 experimental
## 2 7418-61-3 C00576 experimental
## 3 590-46-5 C00719 experimental
## 4 407-64-7 C01181
## 5 C10172 NO
## 6 NO
# Then, access the MS/MS data for a record that has them
sample_ms2_cmpd <- getMetlinMz(117.078979) %>%
filter(MSMS=="experimental") %>%
slice(1)
## [1] "Metlin returned 145 compound(s) between 117.078686302552 and 117.079271697448 m/z with 1 unique formula(s): C5H11NO2"
## [1] "Of those, 5 have experimental MS/MS data: L-Valine, Betaine, N-Methyl-a-aminoisobutyric acid, 5-Aminopentanoic acid, L-Norvaline"
metlin_id <- as.numeric(as.character(sample_ms2_cmpd$cmpd_id))
sample_ms2 <- getMetlinMS2(metlin_id)
## [1] "Metlin had 6 MS2 records for this compound, with collision energies of +0, +10, +20, +40, -0, -10"
head(sample_ms2)
## polarity adduct voltage frag_mass frag_int
## 1 + [M+H] 0 118.18773 0.518033
## 2 + [M+H] 0 118.08622 100.000000
## 3 + [M+H] 0 72.08146 61.057000
## 4 + [M+H] 0 55.05462 1.623280
## 5 + [M+H] 10 118.08630 4.202360
## 6 + [M+H] 10 72.16100 0.658412
# Plot it
split_volt_pos_ms2 <- sample_ms2 %>%
subset(polarity=="+") %>%
split(.$voltage)
layout(matrix(c(1, rep(2:(length(split_volt_pos_ms2)+1), each=2), 1), ncol = 1))
par(mar=c(0.1, 4.1, 0.1, 0.1))
plot.new()
text(x = 0.5, y=1, labels = sample_ms2_cmpd$cmpd_name, cex=3)
for(i in split_volt_pos_ms2){
plot(i$frag_mass, i$frag_int, xlab = "",
ylab=paste("Voltage", unique(i$voltage)),
xlim=c(0, max(sample_ms2$frag_mass)),
xaxt="n", yaxt="n", type="n", ylim=c(0, 120))
segments(x0 = i$frag_mass, x1 = i$frag_mass,
y0 = 0, y1 = i$frag_int)
axis(side = 2, at = c(0, 50, 100), labels = c(0, 50, 100))
}
axis(side = 1)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
gp <- sample_ms2 %>%
filter(polarity=="+") %>%
#filter(voltage==20) %>%
ggplot(label=frag_mass) +
geom_segment(aes(yend=0, x=frag_mass, y=frag_int, xend=frag_mass)) +
#geom_hline(yintercept=0) +
facet_wrap(~voltage, ncol = 1) +
theme_bw() +
xlim(0, max(sample_ms2$frag_mass))
gp
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(gp, tooltip = c("y", "x"))